# -*- coding: utf-8 -*-
# @Time        :2024/10/31 下午5:05
# @Author      :文刀水寿
# @File        : 03_数据计算_案例1.py
"""
 @Description :单词计数统计
"""
# 1. 构建执行坏境入口对象
from pyspark import SparkConf, SparkContext
import os

os.environ['PYSPARK_PYTHON'] = "D:/Python/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)

# 2. 读取数据文件
rdd = sc.textFile("D:/spark案例1.txt")

# 3. 取出全部单词
rdd2 = rdd.flatMap(lambda x: x.split(" "))

# 4. 将所有单词都转换成二元元组，单词为key，values设置为1
rdd3 = rdd2.map(lambda world: (world, 1))

# 5. 分组求和
rdd4 = rdd3.reduceByKey(lambda a, b: a + b)

# 6. 打印输出结果
print(rdd4.collect())
